#!/usr/bin/env bash

# Train and test STAViS audiovisual models for all splits

base_path='./experiments/audiovisual_train_test'

results_path='results'
mkdir -p ${base_path}'/'${results_path}

# python main_train_one_dataset.py --gpu_devices 0,1 --batch_size 128 --n_threads 12 \
#     --audiovisual True --checkpoint 20 --n_epochs 60 \
#     --root_path ${base_path} --result_path ${results_path} \
#     --pretrain_path ./data/pretrained_models/resnet-50-kinetics.pth  \
#     --audio_pretrain_path ./data/pretrained_models/soundnet8.pth \
# 	--annotation_path_av_train ./data/fold_lists/AVD_list_train_fps.txt \
#     --annotation_path_av_test ./data/fold_lists/AVD_list_test_fps.txt

# python main_train_one_dataset.py --gpu_devices 0,1 --batch_size 128 --n_threads 12 \
#     --audiovisual True --checkpoint 20 --n_epochs 60 \
#     --root_path ${base_path} --result_path ${results_path} \
#     --pretrain_path ./data/pretrained_models/resnet-50-kinetics.pth  \
#     --audio_pretrain_path ./data/pretrained_models/soundnet8.pth \
# 	--annotation_path_av_train ./data/fold_lists/AVD_list_train_fps.txt \
#     --annotation_path_av_test ./data/fold_lists/AVD_list_test_fps.txt

python main_one_dataset.py --gpu_devices 0,1 --batch_size 64 --n_threads 12 \
    --result_path ./audiovisual_nonav_results \
    --dataset avad \
    --pool_layer "avgpool" \
    --learning_rate 0.001 \
    --temp_reduction 2 \
    --upsample_func "bilinear" \
    --explain "original audio visual pyramid, avgpool, dsam module" \
    --no_use_dsam_att \
    --no_use_dsam_multi \
    --no_use_transposeConv \
    --no_use_spatio_att \
    --audiovisual True --checkpoint 20 --n_epochs 21 \
    --root_path ${base_path} \
    --audio_pretrain_path ./data/pretrained_models/soundnet8.pth \
  	--annotation_path_av_train ./data/fold_lists/AVD_list_train_fps.txt \
    --annotation_path_av_test ./data/fold_lists/AVD_list_test_fps.txt